import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pandas as pd
data = pd.read_csv("World_Population_Data.csv", encoding='cp1252')
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 234 entries, 0 to 233 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Index 234 non-null int64 1 Country (or dependency) 234 non-null object 2 Continent 234 non-null object 3 Population (2023) 234 non-null object 4 Yearly Change 234 non-null object 5 Net Change 234 non-null object 6 Density (P/Kmý) 234 non-null object 7 Land Area (Kmý) 234 non-null object 8 Migrants (net) 234 non-null object 9 Fert. Rate 233 non-null float64 10 Med. Age 233 non-null float64 11 Urban Pop % 234 non-null object 12 World Share 234 non-null object dtypes: float64(2), int64(1), object(10) memory usage: 23.9+ KB
data
| Index | Country (or dependency) | Continent | Population (2023) | Yearly Change | Net Change | Density (P/Kmý) | Land Area (Kmý) | Migrants (net) | Fert. Rate | Med. Age | Urban Pop % | World Share | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | India | Asia | 1,428,627,663 | 0.81 % | 11,454,490 | 481 | 2,973,190 | -486,136 | 2.0 | 28.0 | 36 % | 17.76 % |
| 1 | 2 | China | Asia | 1,425,671,352 | -0.02 % | -215,985 | 152 | 9,388,211 | -310,220 | 1.2 | 39.0 | 65 % | 17.72 % |
| 2 | 3 | United States | America | 339,996,563 | 0.50 % | 1,706,706 | 37 | 9,147,420 | 999,700 | 1.7 | 38.0 | 83 % | 4.23 % |
| 3 | 4 | Indonesia | Asia | 277,534,122 | 0.74 % | 2,032,783 | 153 | 1,811,570 | -49,997 | 2.1 | 30.0 | 59 % | 3.45 % |
| 4 | 5 | Pakistan | Asia | 240,485,658 | 1.98 % | 4,660,796 | 312 | 770,880 | -165,988 | 3.3 | 21.0 | 35 % | 2.99 % |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 229 | 230 | Montserrat | America | 4,386 | -0.09 % | -4 | 44 | 100 | 0 | 1.6 | 44.0 | 11 % | 0.00 % |
| 230 | 231 | Falkland Islands | America | 3,791 | 0.29 % | 11 | 0 | 12,170 | 0 | 1.6 | 40.0 | 62 % | 0.00 % |
| 231 | 232 | Niue | Oceania | 1,935 | 0.05 % | 1 | 7 | 260 | 0 | 2.4 | 36.0 | 41 % | 0.00 % |
| 232 | 233 | Tokelau | Oceania | 1,893 | 1.18 % | 22 | 189 | 10 | 0 | 2.6 | 27.0 | 0 % | 0.00 % |
| 233 | 234 | Holy See | Europe | 518 | 1.57 % | 8 | 1,295 | 0 | 0 | NaN | NaN | N.A. | 0.00 % |
234 rows × 13 columns
data["Population (2023)"].replace(',','', regex=True, inplace=True)
data["World Share"].replace(' %', '', regex=True, inplace=True)
data["Population (2023)"] = pd.to_numeric(data["Population (2023)"])
data["World Share"] = pd.to_numeric(data["World Share"])
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 234 entries, 0 to 233 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Index 234 non-null int64 1 Country (or dependency) 234 non-null object 2 Continent 234 non-null object 3 Population (2023) 234 non-null int64 4 Yearly Change 234 non-null object 5 Net Change 234 non-null object 6 Density (P/Kmý) 234 non-null object 7 Land Area (Kmý) 234 non-null object 8 Migrants (net) 234 non-null object 9 Fert. Rate 233 non-null float64 10 Med. Age 233 non-null float64 11 Urban Pop % 234 non-null object 12 World Share 234 non-null float64 dtypes: float64(3), int64(2), object(8) memory usage: 23.9+ KB
Toral_population = data['Population (2023)'].sum()
print(f"Total Population in our planet: {Toral_population}")
Total Population in our planet: 8043901603
Toral_population_by_continent = data.groupby('Continent')['Population (2023)'].sum().reset_index().sort_values(by=['Population (2023)'], ascending=False)
Toral_population_by_continent
| Continent | Population (2023) | |
|---|---|---|
| 2 | Asia | 4751819588 |
| 0 | Africa | 1460481772 |
| 1 | America | 1043901526 |
| 3 | Europe | 741693851 |
| 4 | Oceania | 46004866 |
Country = data["Country (or dependency)"]
Population = data["Population (2023)"]
Percentage = data["World Share"]
Continent = data["Continent"]
from kaleido.scopes.plotly import PlotlyScope
import plotly.io as pio
pio.renderers.default = "png"
fig = px.treemap(data_frame=data,
path=[px.Constant("World Population by Country"), Country],
values=Population,
color=Percentage,
color_continuous_scale=["#E4F1FF", "#0174BE", "#0174BE", "#4477CE"],
)
fig.update_traces(
textinfo="label+value+percent parent",
)
fig.update_layout(margin=dict(t=30, l=30, r=20, b=20),)
fig.update_coloraxes(showscale=False)
fig.show("notebook")
fig = px.treemap(
data_frame=data,
path=[px.Constant("World Population by Continent"), Continent, Country],
values=Population,
color=Percentage,
color_continuous_scale=["#D8E9F0", "#0174BE", "#0174BE", "#4477CE"],
branchvalues='total',
labels={'Percentage': 'World Share'},
)
fig.update_traces(
textinfo="label+percent parent",
hovertemplate=None,
hoverinfo="skip",
)
fig.update_layout(margin=dict(t=30, l=30, r=20, b=20),
font=dict(
family="verdana",
size=14
))
fig.update_coloraxes(showscale=False)
fig.show("notebook")
I will use it to see how much each continent covers the total world population.
world_share_group = data.groupby('Continent')["World Share"].sum().reset_index()
world_share_group
| Continent | World Share | |
|---|---|---|
| 0 | Africa | 18.17 |
| 1 | America | 12.97 |
| 2 | Asia | 59.08 |
| 3 | Europe | 9.24 |
| 4 | Oceania | 0.55 |
world_share_group.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5 entries, 0 to 4 Data columns (total 2 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Continent 5 non-null object 1 World Share 5 non-null float64 dtypes: float64(1), object(1) memory usage: 208.0+ bytes
from packed_bubbles import BubbleChart
from matplotlib import rcParams
unique_continent = world_share_group["Continent"]
world_share_by_continent = world_share_group["World Share"]
color_continuous_scale=["#5DAE8B", '#71a9f7', "#1450A3", "#FFD00C", "#FF9C6D" ]
bubble_chart = BubbleChart(area=world_share_by_continent,
bubble_spacing=0.5)
bubble_chart.collapse()
fig, ax = plt.subplots(subplot_kw=dict(aspect="equal"),)
bubble_chart.plot(
ax, np.ma.masked_where(world_share_by_continent <= 1, world_share_by_continent.round(2)), color_continuous_scale)
legend = plt.legend(unique_continent, loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=5, frameon=False)
for text in legend.get_texts():
text.set_color("black")
font = {'size': 14}
# using rc function
plt.rc('font', **font)
COLOR = 'white'
plt.rcParams['text.color'] = COLOR
ax.axis("off")
ax.relim()
ax.autoscale_view()
ax.set_title('"World population cover by each continent"', loc='center')
plt.show()
world_share_group_sorted = world_share_group.sort_values(by="World Share", ascending=False)
fig = go.Figure(data=[go.Bar(
x=world_share_group_sorted["Continent"],
y=world_share_group_sorted["World Share"].round(2),
text=world_share_group_sorted["World Share"].round(2),
textposition='auto',
marker=dict(color='#4477CE')
)])
fig.update_layout(
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(0,0,0,0)',
title=("World population cover by each continent (Bar chart)")
)
fig.show("notebook")